## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:gdata':
##
## combine
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## Loading required package: magrittr
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
summary(dataset)
## Customer.Lifetime.Value State Response Coverage
## Min. : 1898 Arizona :1703 No :7826 Basic :5568
## 1st Qu.: 3994 California:3150 Yes:1308 Extended:2742
## Median : 5780 Nevada : 882 Premium : 824
## Mean : 8005 Oregon :2601
## 3rd Qu.: 8962 Washington: 798
## Max. :83325
##
## Education EmploymentStatus Gender
## Bachelor :2748 Disabled : 405 F:4658
## College :2681 Employed :5698 M:4476
## Doctor : 342 Medical Leave: 432
## High School or Below:2622 Retired : 282
## Master : 741 Unemployed :2317
##
##
## Income Location.Code Marital.Status Monthly.Premium.Auto
## Min. : 0 Rural :1773 Divorced:1369 Min. : 61.00
## 1st Qu.: 0 Suburban:5779 Married :5298 1st Qu.: 68.00
## Median :33890 Urban :1582 Single :2467 Median : 83.00
## Mean :37657 Mean : 93.22
## 3rd Qu.:62320 3rd Qu.:109.00
## Max. :99981 Max. :298.00
##
## Months.Since.Last.Claim Months.Since.Policy.Inception
## Min. : 0.0 Min. : 0.00
## 1st Qu.: 6.0 1st Qu.:24.00
## Median :14.0 Median :48.00
## Mean :15.1 Mean :48.06
## 3rd Qu.:23.0 3rd Qu.:71.00
## Max. :35.0 Max. :99.00
##
## Number.of.Open.Complaints Number.of.Policies Policy.Type
## Min. :0.0000 Min. :1.000 Corporate Auto:1968
## 1st Qu.:0.0000 1st Qu.:1.000 Personal Auto :6788
## Median :0.0000 Median :2.000 Special Auto : 378
## Mean :0.3844 Mean :2.966
## 3rd Qu.:0.0000 3rd Qu.:4.000
## Max. :5.0000 Max. :9.000
##
## Policy Renew.Offer.Type Sales.Channel
## Personal L3 :3426 Offer1:3752 Agent :3477
## Personal L2 :2122 Offer2:2926 Branch :2567
## Personal L1 :1240 Offer3:1432 Call Center:1765
## Corporate L3:1014 Offer4:1024 Web :1325
## Corporate L2: 595
## Corporate L1: 359
## (Other) : 378
## Total.Claim.Amount Vehicle.Class Vehicle.Size
## Min. : 0.099 Four-Door Car:4621 Large : 946
## 1st Qu.: 272.258 Luxury Car : 163 Medsize:6424
## Median : 383.945 Luxury SUV : 184 Small :1764
## Mean : 434.089 Sports Car : 484
## 3rd Qu.: 547.515 SUV :1796
## Max. :2893.240 Two-Door Car :1886
##
sapply(dataset, function(x) sum(is.na(x)))
## Customer.Lifetime.Value State
## 0 0
## Response Coverage
## 0 0
## Education EmploymentStatus
## 0 0
## Gender Income
## 0 0
## Location.Code Marital.Status
## 0 0
## Monthly.Premium.Auto Months.Since.Last.Claim
## 0 0
## Months.Since.Policy.Inception Number.of.Open.Complaints
## 0 0
## Number.of.Policies Policy.Type
## 0 0
## Policy Renew.Offer.Type
## 0 0
## Sales.Channel Total.Claim.Amount
## 0 0
## Vehicle.Class Vehicle.Size
## 0 0




#Treated Outliers



## Data Exploration



#Customer Lifetime Value is dispropotionately high for those with
#just one policy and similar in distribution for others
dataset$Number.of.Policies <- as.factor(ifelse(dataset$Number.of.Policies>1, ifelse(dataset$Number.of.Policies>3, ">3", "2-3"), "<2"))
a = ggplot(dataset, aes(x = Customer.Lifetime.Value))
a + geom_density(aes(y = ..count..,color = State))

#California and Oregon have highest number of customer. Most relevant levels
a + geom_density(aes(y = ..count..,color = Response))

#Most customers respond "No"
a + geom_density(aes(y = ..count..,color = Coverage))

#Basic coverage most relevant, followed by extended, then Premium
a + geom_density(aes(y = ..count..,color = Education))

#Bachelors, College and HS educated customers most relevant.
#Masters and Doctors less so
a + geom_density(aes(y = ..count..,color = EmploymentStatus))

#Employed and Unemployed most relevant. Rest less so
a + geom_density(aes(y = ..count..,color = Gender))

#No significant variation with factors
a + geom_density(aes(y = ..count..,color = Location.Code))

#Suburban customers are most relevant.
a + geom_density(aes(y = ..count..,color = Marital.Status))

#Married customers are most relevat. Followed by Single and Divorced.
a + geom_density(aes(y = ..count..,color = Policy.Type))

#Personal Auto is most relevant. Special Auto least
a + geom_density(aes(y = ..count..,color = Policy))

#Same as above. Personal L3 is most relevant
a + geom_density(aes(y = ..count..,color = Renew.Offer.Type))

#Offer 1 and 2 are most relevant.
a + geom_density(aes(y = ..count..,color = Sales.Channel))

#Agent sales is most relevant. Followed by Branch, then CallCenter and Web
a + geom_density(aes(y = ..count..,color = Vehicle.Class))

#4-Door cars are most relevant
a + geom_density(aes(y = ..count..,color = Vehicle.Size))

#Medsize vehicles are most relevant
a + geom_density(aes(y = ..count..,color = Number.of.Policies))

#Low value customers have just 1 policy. High value have 2-3. Mid-Value Customers have >3
#Create Test and Train data
set.seed(100)
Train1 = createDataPartition(dataset[,1], p=0.7, list = FALSE)
trainData = dataset[Train1,]
testData = dataset[-Train1,]
model = step(lm(Customer.Lifetime.Value~., data = trainData), direction = "both")
## Start: AIC=72463.69
## Customer.Lifetime.Value ~ State + Response + Coverage + Education +
## EmploymentStatus + Gender + Income + Location.Code + Marital.Status +
## Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Policy.Type +
## Policy + Renew.Offer.Type + Sales.Channel + Total.Claim.Amount +
## Vehicle.Class + Vehicle.Size
##
##
## Step: AIC=72463.69
## Customer.Lifetime.Value ~ State + Response + Coverage + Education +
## EmploymentStatus + Gender + Income + Location.Code + Marital.Status +
## Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Policy +
## Renew.Offer.Type + Sales.Channel + Total.Claim.Amount + Vehicle.Class +
## Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Policy 8 1.0041e+07 7.6591e+09 72454
## - Vehicle.Class 3 1.5668e+06 7.6506e+09 72459
## - State 4 5.4782e+06 7.6546e+09 72459
## - Sales.Channel 3 3.4035e+06 7.6525e+09 72460
## - Education 4 7.4454e+06 7.6565e+09 72461
## - Renew.Offer.Type 3 4.7181e+06 7.6538e+09 72461
## - Location.Code 2 1.7248e+06 7.6508e+09 72461
## - Total.Claim.Amount 1 1.6308e+06 7.6507e+09 72463
## - Months.Since.Last.Claim 1 2.9448e+06 7.6520e+09 72464
## <none> 7.6491e+09 72464
## - Income 1 3.2754e+06 7.6523e+09 72464
## - Gender 1 3.4364e+06 7.6525e+09 72464
## - Vehicle.Size 2 7.7646e+06 7.6568e+09 72465
## - Response 1 9.0154e+06 7.6581e+09 72468
## - Coverage 2 1.5242e+07 7.6643e+09 72470
## - Months.Since.Policy.Inception 1 1.6260e+07 7.6653e+09 72472
## - Marital.Status 2 2.1758e+07 7.6708e+09 72474
## - Number.of.Open.Complaints 1 3.6359e+07 7.6854e+09 72486
## - EmploymentStatus 4 6.5316e+07 7.7144e+09 72499
## - Monthly.Premium.Auto 1 4.6323e+08 8.1123e+09 72761
## - Number.of.Policies 2 2.3946e+10 3.1595e+10 79677
##
## Step: AIC=72454.37
## Customer.Lifetime.Value ~ State + Response + Coverage + Education +
## EmploymentStatus + Gender + Income + Location.Code + Marital.Status +
## Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type +
## Sales.Channel + Total.Claim.Amount + Vehicle.Class + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Vehicle.Class 3 1.3873e+06 7.6605e+09 72449
## - State 4 5.7719e+06 7.6649e+09 72450
## - Sales.Channel 3 3.4378e+06 7.6626e+09 72451
## - Renew.Offer.Type 3 4.5042e+06 7.6636e+09 72451
## - Education 4 7.6361e+06 7.6667e+09 72451
## - Location.Code 2 1.8950e+06 7.6610e+09 72452
## - Total.Claim.Amount 1 1.7253e+06 7.6608e+09 72454
## <none> 7.6591e+09 72454
## - Months.Since.Last.Claim 1 3.0747e+06 7.6622e+09 72454
## - Income 1 3.3096e+06 7.6624e+09 72455
## - Gender 1 3.4345e+06 7.6625e+09 72455
## - Vehicle.Size 2 7.5778e+06 7.6667e+09 72455
## + Policy.Type 2 1.8366e+06 7.6573e+09 72457
## - Response 1 9.2601e+06 7.6684e+09 72459
## - Coverage 2 1.5890e+07 7.6750e+09 72461
## - Months.Since.Policy.Inception 1 1.6535e+07 7.6756e+09 72463
## + Policy 8 1.0041e+07 7.6491e+09 72464
## - Marital.Status 2 2.2096e+07 7.6812e+09 72465
## - Number.of.Open.Complaints 1 3.6454e+07 7.6956e+09 72477
## - EmploymentStatus 4 6.4492e+07 7.7236e+09 72489
## - Monthly.Premium.Auto 1 4.6240e+08 8.1215e+09 72751
## - Number.of.Policies 2 2.3955e+10 3.1614e+10 79664
##
## Step: AIC=72449.29
## Customer.Lifetime.Value ~ State + Response + Coverage + Education +
## EmploymentStatus + Gender + Income + Location.Code + Marital.Status +
## Monthly.Premium.Auto + Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type +
## Sales.Channel + Total.Claim.Amount + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - State 4 5.8099e+06 7.6663e+09 72445
## - Sales.Channel 3 3.4426e+06 7.6639e+09 72446
## - Renew.Offer.Type 3 4.4794e+06 7.6650e+09 72446
## - Education 4 7.6098e+06 7.6681e+09 72446
## - Location.Code 2 1.9071e+06 7.6624e+09 72447
## - Total.Claim.Amount 1 1.7749e+06 7.6623e+09 72448
## <none> 7.6605e+09 72449
## - Months.Since.Last.Claim 1 3.0708e+06 7.6636e+09 72449
## - Gender 1 3.3720e+06 7.6639e+09 72450
## - Income 1 3.4603e+06 7.6640e+09 72450
## - Vehicle.Size 2 7.4680e+06 7.6680e+09 72450
## + Policy.Type 2 1.7656e+06 7.6587e+09 72452
## - Response 1 9.2560e+06 7.6698e+09 72453
## + Vehicle.Class 3 1.3873e+06 7.6591e+09 72454
## - Months.Since.Policy.Inception 1 1.6729e+07 7.6772e+09 72458
## + Policy 8 9.8613e+06 7.6506e+09 72459
## - Marital.Status 2 2.2073e+07 7.6826e+09 72460
## - Coverage 2 2.7444e+07 7.6879e+09 72463
## - Number.of.Open.Complaints 1 3.6457e+07 7.6970e+09 72471
## - EmploymentStatus 4 6.4231e+07 7.7247e+09 72484
## - Monthly.Premium.Auto 1 3.2624e+09 1.0923e+10 74252
## - Number.of.Policies 2 2.3954e+10 3.1614e+10 79658
##
## Step: AIC=72445.15
## Customer.Lifetime.Value ~ Response + Coverage + Education + EmploymentStatus +
## Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type +
## Sales.Channel + Total.Claim.Amount + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Sales.Channel 3 3.4233e+06 7.6697e+09 72441
## - Renew.Offer.Type 3 4.5536e+06 7.6709e+09 72442
## - Education 4 7.6579e+06 7.6740e+09 72442
## - Location.Code 2 1.9257e+06 7.6682e+09 72442
## - Total.Claim.Amount 1 1.9371e+06 7.6682e+09 72444
## <none> 7.6663e+09 72445
## - Months.Since.Last.Claim 1 3.0692e+06 7.6694e+09 72445
## - Gender 1 3.4511e+06 7.6698e+09 72445
## - Income 1 3.4712e+06 7.6698e+09 72445
## - Vehicle.Size 2 7.5226e+06 7.6738e+09 72446
## + Policy.Type 2 1.8844e+06 7.6644e+09 72448
## + State 4 5.8099e+06 7.6605e+09 72449
## - Response 1 9.2926e+06 7.6756e+09 72449
## + Vehicle.Class 3 1.4253e+06 7.6649e+09 72450
## - Months.Since.Policy.Inception 1 1.6610e+07 7.6829e+09 72454
## + Policy 8 1.0154e+07 7.6562e+09 72454
## - Marital.Status 2 2.2276e+07 7.6886e+09 72456
## - Coverage 2 2.7441e+07 7.6938e+09 72459
## - Number.of.Open.Complaints 1 3.6304e+07 7.7026e+09 72467
## - EmploymentStatus 4 6.4356e+07 7.7307e+09 72480
## - Monthly.Premium.Auto 1 3.2574e+09 1.0924e+10 74245
## - Number.of.Policies 2 2.3955e+10 3.1621e+10 79651
##
## Step: AIC=72441.42
## Customer.Lifetime.Value ~ Response + Coverage + Education + EmploymentStatus +
## Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type +
## Total.Claim.Amount + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Education 4 7.6644e+06 7.6774e+09 72439
## - Renew.Offer.Type 3 4.7843e+06 7.6745e+09 72439
## - Location.Code 2 1.9566e+06 7.6717e+09 72439
## - Total.Claim.Amount 1 1.9865e+06 7.6717e+09 72441
## <none> 7.6697e+09 72441
## - Months.Since.Last.Claim 1 3.0441e+06 7.6728e+09 72441
## - Income 1 3.4271e+06 7.6732e+09 72442
## - Gender 1 3.4993e+06 7.6732e+09 72442
## - Vehicle.Size 2 7.6497e+06 7.6774e+09 72442
## + Policy.Type 2 2.0379e+06 7.6677e+09 72444
## + Sales.Channel 3 3.4233e+06 7.6663e+09 72445
## - Response 1 8.9947e+06 7.6787e+09 72445
## + State 4 5.7905e+06 7.6639e+09 72446
## + Vehicle.Class 3 1.4334e+06 7.6683e+09 72446
## - Months.Since.Policy.Inception 1 1.6200e+07 7.6859e+09 72450
## + Policy 8 1.0165e+07 7.6596e+09 72451
## - Marital.Status 2 2.2219e+07 7.6920e+09 72452
## - Coverage 2 2.7695e+07 7.6974e+09 72456
## - Number.of.Open.Complaints 1 3.6511e+07 7.7062e+09 72464
## - EmploymentStatus 4 6.4840e+07 7.7346e+09 72476
## - Monthly.Premium.Auto 1 3.2569e+09 1.0927e+10 74240
## - Number.of.Policies 2 2.3957e+10 3.1626e+10 79646
##
## Step: AIC=72438.5
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus +
## Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Renew.Offer.Type +
## Total.Claim.Amount + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Renew.Offer.Type 3 4.6009e+06 7.6820e+09 72436
## - Location.Code 2 1.7688e+06 7.6792e+09 72436
## - Total.Claim.Amount 1 2.1381e+06 7.6795e+09 72438
## <none> 7.6774e+09 72439
## - Months.Since.Last.Claim 1 3.1116e+06 7.6805e+09 72439
## - Income 1 3.3716e+06 7.6808e+09 72439
## - Gender 1 3.4424e+06 7.6808e+09 72439
## - Vehicle.Size 2 7.8655e+06 7.6853e+09 72440
## + Policy.Type 2 2.1156e+06 7.6753e+09 72441
## + Education 4 7.6644e+06 7.6697e+09 72441
## + Sales.Channel 3 3.4298e+06 7.6740e+09 72442
## + State 4 5.8331e+06 7.6716e+09 72443
## - Response 1 9.3335e+06 7.6867e+09 72443
## + Vehicle.Class 3 1.3988e+06 7.6760e+09 72444
## - Months.Since.Policy.Inception 1 1.6500e+07 7.6939e+09 72447
## + Policy 8 1.0351e+07 7.6670e+09 72448
## - Marital.Status 2 2.3181e+07 7.7006e+09 72450
## - Coverage 2 2.7109e+07 7.7045e+09 72452
## - Number.of.Open.Complaints 1 3.6299e+07 7.7137e+09 72460
## - EmploymentStatus 4 6.4868e+07 7.7423e+09 72473
## - Monthly.Premium.Auto 1 3.2608e+09 1.0938e+10 74238
## - Number.of.Policies 2 2.4021e+10 3.1698e+10 79649
##
## Step: AIC=72435.55
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus +
## Gender + Income + Location.Code + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Total.Claim.Amount +
## Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Location.Code 2 1.7360e+06 7.6837e+09 72433
## - Total.Claim.Amount 1 1.9583e+06 7.6840e+09 72435
## - Income 1 2.9924e+06 7.6850e+09 72436
## <none> 7.6820e+09 72436
## - Months.Since.Last.Claim 1 3.0446e+06 7.6850e+09 72436
## - Gender 1 3.5435e+06 7.6855e+09 72436
## - Vehicle.Size 2 7.5915e+06 7.6896e+09 72437
## + Policy.Type 2 2.0816e+06 7.6799e+09 72438
## + Renew.Offer.Type 3 4.6009e+06 7.6774e+09 72439
## + Education 4 7.4811e+06 7.6745e+09 72439
## + Sales.Channel 3 3.6840e+06 7.6783e+09 72439
## - Response 1 9.0126e+06 7.6910e+09 72440
## + State 4 5.9100e+06 7.6761e+09 72440
## + Vehicle.Class 3 1.3791e+06 7.6806e+09 72441
## - Months.Since.Policy.Inception 1 1.5797e+07 7.6978e+09 72444
## + Policy 8 1.0153e+07 7.6718e+09 72445
## - Marital.Status 2 2.2065e+07 7.7041e+09 72446
## - Coverage 2 2.7254e+07 7.7093e+09 72450
## - Number.of.Open.Complaints 1 3.5197e+07 7.7172e+09 72457
## - EmploymentStatus 4 6.3869e+07 7.7459e+09 72470
## - Monthly.Premium.Auto 1 3.3096e+09 1.0992e+10 74256
## - Number.of.Policies 2 2.4741e+10 3.2423e+10 79758
##
## Step: AIC=72432.7
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus +
## Gender + Income + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Total.Claim.Amount +
## Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Total.Claim.Amount 1 6.3751e+05 7.6844e+09 72431
## - Months.Since.Last.Claim 1 2.9557e+06 7.6867e+09 72433
## <none> 7.6837e+09 72433
## - Income 1 3.6235e+06 7.6874e+09 72433
## - Gender 1 3.7022e+06 7.6874e+09 72433
## - Vehicle.Size 2 8.3326e+06 7.6921e+09 72434
## + Policy.Type 2 2.1562e+06 7.6816e+09 72435
## + Location.Code 2 1.7360e+06 7.6820e+09 72436
## + Renew.Offer.Type 3 4.5681e+06 7.6792e+09 72436
## + Education 4 7.2930e+06 7.6764e+09 72436
## - Response 1 8.2597e+06 7.6920e+09 72436
## + Sales.Channel 3 3.7246e+06 7.6800e+09 72436
## + State 4 5.9367e+06 7.6778e+09 72437
## + Vehicle.Class 3 1.3893e+06 7.6823e+09 72438
## - Months.Since.Policy.Inception 1 1.5955e+07 7.6997e+09 72441
## + Policy 8 1.0312e+07 7.6734e+09 72442
## - Marital.Status 2 2.1545e+07 7.7053e+09 72443
## - Coverage 2 2.6879e+07 7.7106e+09 72446
## - Number.of.Open.Complaints 1 3.5393e+07 7.7191e+09 72454
## - EmploymentStatus 4 6.4499e+07 7.7482e+09 72467
## - Monthly.Premium.Auto 1 4.4837e+09 1.2167e+10 74769
## - Number.of.Policies 2 2.4739e+10 3.2423e+10 79754
##
## Step: AIC=72431.12
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus +
## Gender + Income + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Last.Claim + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + Number.of.Policies + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## - Months.Since.Last.Claim 1 2.9838e+06 7.6874e+09 72431
## <none> 7.6844e+09 72431
## - Income 1 3.2174e+06 7.6876e+09 72431
## - Gender 1 3.4614e+06 7.6878e+09 72431
## - Vehicle.Size 2 7.8152e+06 7.6922e+09 72432
## + Total.Claim.Amount 1 6.3751e+05 7.6837e+09 72433
## + Policy.Type 2 2.1597e+06 7.6822e+09 72434
## + Education 4 7.4794e+06 7.6769e+09 72434
## + Renew.Offer.Type 3 4.4394e+06 7.6799e+09 72434
## + Sales.Channel 3 3.7303e+06 7.6806e+09 72435
## - Response 1 8.5652e+06 7.6929e+09 72435
## + Location.Code 2 4.1526e+05 7.6840e+09 72435
## + State 4 6.0283e+06 7.6783e+09 72435
## + Vehicle.Class 3 1.4433e+06 7.6829e+09 72436
## - Months.Since.Policy.Inception 1 1.6114e+07 7.7005e+09 72440
## + Policy 8 1.0339e+07 7.6740e+09 72440
## - Marital.Status 2 2.0935e+07 7.7053e+09 72441
## - Coverage 2 2.7035e+07 7.7114e+09 72445
## - Number.of.Open.Complaints 1 3.5446e+07 7.7198e+09 72453
## - EmploymentStatus 4 6.3885e+07 7.7483e+09 72465
## - Monthly.Premium.Auto 1 5.4545e+09 1.3139e+10 75158
## - Number.of.Policies 2 2.4740e+10 3.2425e+10 79752
##
## Step: AIC=72431.1
## Customer.Lifetime.Value ~ Response + Coverage + EmploymentStatus +
## Gender + Income + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Policy.Inception + Number.of.Open.Complaints +
## Number.of.Policies + Vehicle.Size
##
## Df Sum of Sq RSS AIC
## <none> 7.6874e+09 72431
## + Months.Since.Last.Claim 1 2.9838e+06 7.6844e+09 72431
## - Income 1 3.5107e+06 7.6909e+09 72431
## - Gender 1 3.5340e+06 7.6909e+09 72431
## - Vehicle.Size 2 7.7985e+06 7.6952e+09 72432
## + Total.Claim.Amount 1 6.6564e+05 7.6867e+09 72433
## + Policy.Type 2 2.2362e+06 7.6851e+09 72434
## + Education 4 7.5526e+06 7.6798e+09 72434
## + Renew.Offer.Type 3 4.3723e+06 7.6830e+09 72434
## + Sales.Channel 3 3.6938e+06 7.6837e+09 72435
## + Location.Code 2 4.1757e+05 7.6869e+09 72435
## - Response 1 8.7344e+06 7.6961e+09 72435
## + State 4 6.0297e+06 7.6813e+09 72435
## + Vehicle.Class 3 1.4438e+06 7.6859e+09 72436
## - Months.Since.Policy.Inception 1 1.5357e+07 7.7027e+09 72439
## + Policy 8 1.0484e+07 7.6769e+09 72440
## - Marital.Status 2 2.0809e+07 7.7082e+09 72441
## - Coverage 2 2.7092e+07 7.7144e+09 72445
## - Number.of.Open.Complaints 1 3.5663e+07 7.7230e+09 72453
## - EmploymentStatus 4 6.3326e+07 7.7507e+09 72465
## - Monthly.Premium.Auto 1 5.4593e+09 1.3147e+10 75159
## - Number.of.Policies 2 2.4738e+10 3.2425e+10 79751
summary(model)
##
## Call:
## lm(formula = Customer.Lifetime.Value ~ Response + Coverage +
## EmploymentStatus + Gender + Income + Marital.Status + Monthly.Premium.Auto +
## Months.Since.Policy.Inception + Number.of.Open.Complaints +
## Number.of.Policies + Vehicle.Size, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3231.0 -435.1 22.6 442.1 7059.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.988e+03 1.361e+02 -14.611 < 2e-16 ***
## ResponseYes 1.225e+02 5.105e+01 2.400 0.016437 *
## CoverageExtended 1.349e+01 4.241e+01 0.318 0.750398
## CoveragePremium 3.338e+02 8.084e+01 4.129 3.71e-05 ***
## EmploymentStatusEmployed 3.352e+02 9.244e+01 3.626 0.000291 ***
## EmploymentStatusMedical Leave 8.404e+01 1.152e+02 0.729 0.465882
## EmploymentStatusRetired -4.023e+01 1.297e+02 -0.310 0.756411
## EmploymentStatusUnemployed -9.293e+01 9.393e+01 -0.989 0.322548
## GenderM -5.294e+01 3.468e+01 -1.527 0.126938
## Income 1.467e-03 9.643e-04 1.521 0.128197
## Marital.StatusMarried 6.934e+01 5.064e+01 1.369 0.170978
## Marital.StatusSingle -9.233e+01 5.894e+01 -1.566 0.117318
## Monthly.Premium.Auto 6.088e+01 1.015e+00 59.998 < 2e-16 ***
## Months.Since.Policy.Inception -1.967e+00 6.181e-01 -3.182 0.001470 **
## Number.of.Open.Complaints -9.071e+01 1.871e+01 -4.849 1.28e-06 ***
## Number.of.Policies>3 3.232e+03 4.241e+01 76.214 < 2e-16 ***
## Number.of.Policies2-3 5.193e+03 4.133e+01 125.638 < 2e-16 ***
## Vehicle.SizeMedsize -6.071e+01 5.704e+01 -1.064 0.287204
## Vehicle.SizeSmall -1.415e+02 6.654e+01 -2.127 0.033481 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1231 on 5069 degrees of freedom
## Multiple R-squared: 0.8069, Adjusted R-squared: 0.8062
## F-statistic: 1176 on 18 and 5069 DF, p-value: < 2.2e-16
vif(model)
## GVIF Df GVIF^(1/(2*Df))
## Response 1.103689 1 1.050566
## Coverage 1.338046 2 1.075518
## EmploymentStatus 3.393559 4 1.165016
## Gender 1.008177 1 1.004080
## Income 2.847800 1 1.687543
## Marital.Status 1.177034 2 1.041591
## Monthly.Premium.Auto 1.334899 1 1.155378
## Months.Since.Policy.Inception 1.002681 1 1.001340
## Number.of.Open.Complaints 1.004348 1 1.002171
## Number.of.Policies 1.010246 2 1.002552
## Vehicle.Size 1.015671 2 1.003895
model = step(lm(Customer.Lifetime.Value~I(Coverage=="Premium")+
I(EmploymentStatus=="Employed")+
Monthly.Premium.Auto +
Months.Since.Policy.Inception+
Number.of.Open.Complaints+
I(Number.of.Policies == "<2")+
I(Number.of.Policies == "2-3")+
I(Number.of.Policies == ">3")
, data = trainData), direction = "both")
## Start: AIC=72452.57
## Customer.Lifetime.Value ~ I(Coverage == "Premium") + I(EmploymentStatus ==
## "Employed") + Monthly.Premium.Auto + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + I(Number.of.Policies == "<2") +
## I(Number.of.Policies == "2-3") + I(Number.of.Policies ==
## ">3")
##
##
## Step: AIC=72452.57
## Customer.Lifetime.Value ~ I(Coverage == "Premium") + I(EmploymentStatus ==
## "Employed") + Monthly.Premium.Auto + Months.Since.Policy.Inception +
## Number.of.Open.Complaints + I(Number.of.Policies == "<2") +
## I(Number.of.Policies == "2-3")
##
## Df Sum of Sq RSS AIC
## <none> 7.7533e+09 72453
## - Months.Since.Policy.Inception 1 14467634 7.7678e+09 72460
## - I(Coverage == "Premium") 1 26797427 7.7801e+09 72468
## - Number.of.Open.Complaints 1 37345279 7.7907e+09 72475
## - I(EmploymentStatus == "Employed") 1 280078275 8.0334e+09 72631
## - I(Number.of.Policies == "2-3") 1 2997053531 1.0750e+10 74113
## - Monthly.Premium.Auto 1 6569810186 1.4323e+10 75573
## - I(Number.of.Policies == "<2") 1 8833377737 1.6587e+10 76320
summary(model)
##
## Call:
## lm(formula = Customer.Lifetime.Value ~ I(Coverage == "Premium") +
## I(EmploymentStatus == "Employed") + Monthly.Premium.Auto +
## Months.Since.Policy.Inception + Number.of.Open.Complaints +
## I(Number.of.Policies == "<2") + I(Number.of.Policies == "2-3"),
## data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3336.1 -460.2 34.0 440.4 7045.5
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1078.3298 91.3251 11.808
## I(Coverage == "Premium")TRUE 325.2615 77.6244 4.190
## I(EmploymentStatus == "Employed")TRUE 492.7177 36.3723 13.547
## Monthly.Premium.Auto 61.4341 0.9364 65.609
## Months.Since.Policy.Inception -1.9074 0.6195 -3.079
## Number.of.Open.Complaints -92.6752 18.7352 -4.947
## I(Number.of.Policies == "<2")TRUE -3230.5683 42.4646 -76.077
## I(Number.of.Policies == "2-3")TRUE 1961.2182 44.2579 44.313
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## I(Coverage == "Premium")TRUE 2.83e-05 ***
## I(EmploymentStatus == "Employed")TRUE < 2e-16 ***
## Monthly.Premium.Auto < 2e-16 ***
## Months.Since.Policy.Inception 0.00209 **
## Number.of.Open.Complaints 7.80e-07 ***
## I(Number.of.Policies == "<2")TRUE < 2e-16 ***
## I(Number.of.Policies == "2-3")TRUE < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1235 on 5080 degrees of freedom
## Multiple R-squared: 0.8052, Adjusted R-squared: 0.8049
## F-statistic: 3000 on 7 and 5080 DF, p-value: < 2.2e-16
vif(model)
## I(Coverage == "Premium") I(EmploymentStatus == "Employed")
## 1.128470 1.003126
## Monthly.Premium.Auto Months.Since.Policy.Inception
## 1.129449 1.000720
## Number.of.Open.Complaints I(Number.of.Policies == "<2")
## 1.001026 1.423118
## I(Number.of.Policies == "2-3")
## 1.425878
par(mfrow=c(2,2))
plot(model)

dwtest(model)
##
## Durbin-Watson test
##
## data: model
## DW = 1.9952, p-value = 0.4326
## alternative hypothesis: true autocorrelation is greater than 0
RMSError = RMSE(model$fitted.values, trainData$Customer.Lifetime.Value, na.rm = TRUE)
MAPE = 100*sum(abs(model$residuals/trainData$Customer.Lifetime.Value))/nrow(trainData)
RMSError
## [1] 1234.441
MAPE
## [1] 13.86105

## [1] 0.8988717
## [1] 1242.833
## [1] 13.9844
#Model is a good fit
model
##
## Call:
## lm(formula = Customer.Lifetime.Value ~ I(Coverage == "Premium") +
## I(EmploymentStatus == "Employed") + Monthly.Premium.Auto +
## Months.Since.Policy.Inception + Number.of.Open.Complaints +
## I(Number.of.Policies == "<2") + I(Number.of.Policies == "2-3"),
## data = trainData)
##
## Coefficients:
## (Intercept)
## 1078.330
## I(Coverage == "Premium")TRUE
## 325.261
## I(EmploymentStatus == "Employed")TRUE
## 492.718
## Monthly.Premium.Auto
## 61.434
## Months.Since.Policy.Inception
## -1.907
## Number.of.Open.Complaints
## -92.675
## I(Number.of.Policies == "<2")TRUE
## -3230.568
## I(Number.of.Policies == "2-3")TRUE
## 1961.218